Documentation for the analysis of mouse placental RNA-seq data at e7.5, e8.5 and e9.5.
This analysis is carried out in two steps. First, run the clustering algorithms with different number of groups (k = 3, 4 and 5). Second, get a summary table with expression levels of transcripts to validate the trends of the groups with visualization. For the second step, we can set up the following functions:
summ <- function(trans_cluster, tpm2=tpm2) {
tpm3 <- tpm2
tpm3 <- cbind(rownames(tpm2), tpm3)
rownames(tpm3) <- 1:nrow(tpm3)
colnames(tpm3)[1] <- "transcripts"
#e7.5
e7.5_table <- data.frame(tpm3$transcripts, rowMeans(tpm3[,c("E7.5_2", "E7.5_3", "E7.5_4", "E7.5_5", "E7.5_6")]))
e7.5_table$time <- c("e7.5")
rownames(e7.5_table) <- 1:nrow(e7.5_table)
colnames(e7.5_table) <- c("transcripts", "mean_cts_scaled", "time")
#e8.5
e8.5_table <- data.frame(tpm3$transcripts, rowMeans(tpm3[,c("E8.5_1", "E8.5_2", "E8.5_3", "E8.5_4", "E8.5_5", "E8.5_6")]))
e8.5_table$time <- c("e8.5")
rownames(e8.5_table) <- 1:nrow(e8.5_table)
colnames(e8.5_table) <- c("transcripts", "mean_cts_scaled", "time")
#e9.5
e9.5_table <- data.frame(tpm3$transcripts, rowMeans(tpm3[,c("E9.5_1", "E9.5_2", "E9.5_3", "E9.5_4", "E9.5_5")]))
e9.5_table$time <- c("e9.5")
rownames(e9.5_table) <- 1:nrow(e9.5_table)
colnames(e9.5_table) <- c("transcripts", "mean_cts_scaled", "time")
summary <- rbind(rbind(e7.5_table, e8.5_table), e9.5_table)
summary <- summary[order(summary$transcripts),]
summary <- inner_join(summary, trans_cluster, by = c("transcripts" = "name"))
summary <- inner_join(summary, t2g, by = c("transcripts" = "target_id"))
return(summary)
}
plotClus <- function(summary, title){
ascl2 <- subset(summary, summary$ext_gene == "Ascl2") #e7.5
gjb5 <- subset(summary, summary$ext_gene == "Gjb5") #e7.5
dnmt1 <- subset(summary, summary$ext_gene == "Dnmt1") #e8.5
itga4 <- subset(summary, summary$ext_gene == "Itga4") #e8.5
gjb2 <- subset(summary, summary$ext_gene == "Gjb2") #e9.5
igf2 <- subset(summary, summary$ext_gene == "Igf2") #e9.5
p <- ggplot(aes(time, mean_cts_scaled), data = summary) +
geom_line(aes(group = transcripts), alpha = 0.5, colour = "grey77") +
geom_line(stat = "summary", fun = "median", size = 2,
aes(group = 1, color = "Group median")) +
labs(title = title,
x = "Time point",
y = "Scaled mean transcript counts", color = "Legend", linetype = "Legend") +
theme(plot.title = element_text(size = 15, face = "bold"), legend.text=element_text(size=20)) +
geom_line(data = ascl2, size = 2.5, aes(group = transcripts, color = "Ascl2", linetype = "Ascl2"), alpha = 1) + #e7.5
geom_line(data = gjb5, size = 2, aes(group = transcripts, color = "Gjb5", linetype = "Gjb5" ), alpha = 1) + #e7.5
geom_line(data = dnmt1, size = 2, aes(group = transcripts, color = "Dnmt1", linetype = "Dnmt1"), alpha = 1) + #e8.5
geom_line(data = itga4, size = 2, aes(group = transcripts, color = "Itga4", linetype = "Itga4"), alpha = 1) + #e8.5
geom_line(data = gjb2, size = 2, aes(group = transcripts, color = "Gjb2", linetype = "Gjb2"), alpha = 1) + #e9.5
geom_line(data = igf2, size = 2, aes(group = transcripts, color = "Igf2", linetype = "Igf2"), alpha = 1) + #e9.5
scale_color_manual(name = "Legend", values = c("Ascl2" = "darkolivegreen4", "Gjb5" = "yellow3",
"Dnmt1" = "dodgerblue4", "Itga4" = "deepskyblue4",
"Gjb2" = "saddlebrown", "Igf2" = "salmon3",
"Group median" = "grey22")) +
scale_linetype_manual(name = "Legend", values = c("Ascl2" = "solid", "Gjb5" = "solid",
"Dnmt1" = "twodash", "Itga4" = "solid",
"Gjb2" = "solid", "Igf2" = "solid",
"Group median" = "solid")) +
guides(linetype=F,
colour=guide_legend(keywidth = 3, keyheight = 1)) +
theme(text = element_text(size=15),
legend.text=element_text(size=15),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
axis.text.x = element_text(angle=0, hjust=0.5, size = 25),
axis.text.y = element_text(size = 15)) +
facet_grid(cols = vars(value))
return(p)
}
Next, load some necessary files:
library("ggplot2", suppressMessages())
library("dplyr", suppressMessages())
library("tidyverse", suppressMessages())
t2g <- read.table("Files/t2g.txt", header = T, sep = "\t")
t2g <- t2g[order(t2g$target_id),]
coding <- read.table("Files/Mus_musculus_grcm38_coding_transcripts.txt", header = F)
tpm2 <- read.table("/work/LAS/geetu-lab/hhvu/project1_2/rna-seq/PlacentaRNA-seq/Files/tpmForClustering.txt", header = T)
Hierarchical clustering, k = 3:
hc <- hclust(dist(tpm2, method = "euclidean"), "complete")
trans_cluster <- cutree(hc, k = 3) %>% enframe()
table(trans_cluster$value)
#>
#> 1 2 3
#> 8091 7238 8242
summK3 <- summ(trans_cluster, tpm2)
summK3$value <- ifelse(summK3$value == "1", "e8.5",
ifelse(summK3$value == "2", "e9.5",
ifelse(summK3$value == "3", "e7.5", summK3$value)))
summK3$value <- ifelse(summK3$value == "e8.5", "2",
ifelse(summK3$value == "e9.5", "3",
ifelse(summK3$value == "e7.5", "1", summK3$value)))
plotClus(summK3, "Hierarchical Clustering of Transcripts, k = 3")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summK32 <- dplyr::distinct(summK3[,c("value", "transcripts")])
summK32 <- summK32[order(summK32$transcripts),]
For K-means clustering:
set.seed(123)
km <- kmeans(tpm2, centers = 3)
trans_cluster_kmeans <- km$cluster %>% enframe()
summKmeans <- summ(trans_cluster_kmeans, tpm2)
summKmeans$value <- ifelse(summKmeans$value == "1", "e9.5",
ifelse(summKmeans$value == "2", "e7.5",
ifelse(summKmeans$value == "3", "e8.5", summKmeans$value)))
summKmeans$value <- ifelse(summKmeans$value == "e8.5", "2",
ifelse(summKmeans$value == "e9.5", "3",
ifelse(summKmeans$value == "e7.5", "1", summKmeans$value)))
plotClus(summKmeans, "K-means Clustering of Transcripts, k = 3")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summKmeans2 <- dplyr::distinct(summKmeans[,c("value", "transcripts")])
summKmeans2 <- summKmeans2[order(summKmeans2$transcripts),]
t <- summKmeans2
for (i in 1:3) {
for (j in 1:3) {
q <- length(intersect(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])) #number of observed successes
print(paste("Percent agreement between hierCluster", i, "and Kmeans cluster", j, "is:", q/length(unique(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"]))*100))
}
}
#> [1] "Percent agreement between hierCluster 1 and Kmeans cluster 1 is: 76.7532152390197"
#> [1] "Percent agreement between hierCluster 1 and Kmeans cluster 2 is: 12.1815093423926"
#> [1] "Percent agreement between hierCluster 1 and Kmeans cluster 3 is: 11.0652754185877"
#> [1] "Percent agreement between hierCluster 2 and Kmeans cluster 1 is: 6.4268940798418"
#> [1] "Percent agreement between hierCluster 2 and Kmeans cluster 2 is: 76.1586948461253"
#> [1] "Percent agreement between hierCluster 2 and Kmeans cluster 3 is: 17.4144110740329"
#> [1] "Percent agreement between hierCluster 3 and Kmeans cluster 1 is: 2.23818734457032"
#> [1] "Percent agreement between hierCluster 3 and Kmeans cluster 2 is: 10.5001381597126"
#> [1] "Percent agreement between hierCluster 3 and Kmeans cluster 3 is: 87.261674495717"
For self-organizing maps:
library("kohonen")
set.seed(123)
som <- som(as.matrix(tpm2), grid = somgrid(3, 1, "rectangular"))
trans_cluster_som <- som$unit.classif %>% enframe()
trans_cluster_som$name <- row.names(tpm2)
Since this run could take time, I saved the results to reproduce the plot here.
trans_cluster_som <- read.table("Files/trans_cluster_som.txt", header = T)
summSOM <- summ(trans_cluster_som, tpm2)
summSOM$value <- ifelse(summSOM$value == "1", "e8.5",
ifelse(summSOM$value == "2", "e9.5",
ifelse(summSOM$value == "3", "e7.5", summSOM$value)))
summSOM$value <- ifelse(summSOM$value == "e8.5", "2",
ifelse(summSOM$value == "e9.5", "3",
ifelse(summSOM$value == "e7.5", "1", summSOM$value)))
plotClus(summSOM, "Self-organizing Maps of Transcripts, k = 3")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summSOM2 <- dplyr::distinct(summSOM[,c("value", "transcripts")])
summSOM2 <- summSOM2[order(summSOM2$transcripts),]
t <- summSOM2
for (i in 1:3) {
for (j in 1:3) {
q <- length(intersect(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])) #number of observed successes
print(paste("Percent of hierCluster", i, "in SOM cluster", j, "is:", q/length(unique(c(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])))*100))
}
}
#> [1] "Percent of hierCluster 1 in SOM cluster 1 is: 70.7761529808774"
#> [1] "Percent of hierCluster 1 in SOM cluster 2 is: 7.13532049591137"
#> [1] "Percent of hierCluster 1 in SOM cluster 3 is: 5.42466095869008"
#> [1] "Percent of hierCluster 2 in SOM cluster 1 is: 3.25616541869891"
#> [1] "Percent of hierCluster 2 in SOM cluster 2 is: 62.4280956706025"
#> [1] "Percent of hierCluster 2 in SOM cluster 3 is: 9.36089487800091"
#> [1] "Percent of hierCluster 3 in SOM cluster 1 is: 1.24250214224507"
#> [1] "Percent of hierCluster 3 in SOM cluster 2 is: 5.07376258100096"
#> [1] "Percent of hierCluster 3 in SOM cluster 3 is: 66.3521023382615"
For spectral clustering:
This clustering method required high memory and computing time, so we should run this analysis on a HPC cluster. If you are reproducing this analysis from an Iowa State cluster such as pronto, the R library Rclustool would require to run on an interactive node with X11 forwarding enabled (see https://researchit.las.iastate.edu/x-forwarding-mac-and-windows). Additionally, request high memory (for example, 128G) to run the analysis. It can take a few hours to finish running.
library("RclusTool")
set.seed(123)
sim <- computeGaussianSimilarity(tpm2, 1)
res <- spectralClustering(sim, K=3)
trans_cluster_sc <- data_frame(row.names(tpm2), res$label)
colnames(trans_cluster_sc) <- c("name", "value")
Plot of transcripts:
trans_cluster_sc <- read.table("Files/trans_cluster_sc.txt", header = T)
summSC <- summ(trans_cluster_sc, tpm2)
summSC$value <- ifelse(summSC$value == "1", "e7.5",
ifelse(summSC$value == "2", "e8.5",
ifelse(summSC$value == "3", "e9.5", summSC$value)))
summSC$value <- ifelse(summSC$value == "e8.5", "2",
ifelse(summSC$value == "e9.5", "3",
ifelse(summSC$value == "e7.5", "1", summSC$value)))
plotClus(summSC, "Spectral Clustering of Transcripts")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summSC2 <- dplyr::distinct(summSC[,c("value", "transcripts")])
summSC2 <- summSC2[order(summSC2$transcripts),]
t <- summSC2
for (i in 1:3) {
for (j in 1:3) {
q <- length(intersect(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])) #number of observed successes
print(paste("Percent agreement between hierCluster", i, "and SC cluster", j, "is:", q/length(unique(c(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])))*100))
}
}
#> [1] "Percent agreement between hierCluster 1 and SC cluster 1 is: 70.3390772651473"
#> [1] "Percent agreement between hierCluster 1 and SC cluster 2 is: 11.2626728110599"
#> [1] "Percent agreement between hierCluster 1 and SC cluster 3 is: 3.42899554675903"
#> [1] "Percent agreement between hierCluster 2 and SC cluster 1 is: 4.36846450192625"
#> [1] "Percent agreement between hierCluster 2 and SC cluster 2 is: 23.1126716926572"
#> [1] "Percent agreement between hierCluster 2 and SC cluster 3 is: 33.5908330114587"
#> [1] "Percent agreement between hierCluster 3 and SC cluster 1 is: 0.830985915492958"
#> [1] "Percent agreement between hierCluster 3 and SC cluster 2 is: 3.45859039072724"
#> [1] "Percent agreement between hierCluster 3 and SC cluster 3 is: 51.3347022587269"
We can see the cluster trends agree with these of hierarchical clustering.
Hierarchical clustering, k = 4:
trans_cluster <- cutree(hc, k = 4) %>% enframe()
write.table(trans_cluster, "Files/trans_cluster_hc_k4.txt", quote = F, row.names = F, sep = "\t")
table(trans_cluster$value)
#>
#> 1 2 3 4
#> 8091 7238 4501 3741
summK3 <- summ(trans_cluster, tpm2)
summK3$value <- ifelse(summK3$value == "1", "e8.5",
ifelse(summK3$value == "2", "e9.5",
ifelse(summK3$value == "3", "e7.5", summK3$value)))
summK3$value <- ifelse(summK3$value == "e8.5", "2",
ifelse(summK3$value == "e9.5", "3",
ifelse(summK3$value == "e7.5", "1", summK3$value)))
plotClus(summK3, "Hierarchical Clustering of Transcripts, k = 4")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summK32 <- dplyr::distinct(summK3[,c("value", "transcripts")])
summK32 <- summK32[order(summK32$transcripts),]
For K-means clustering:
set.seed(123)
km <- kmeans(tpm2, centers = 4)
trans_cluster_kmeans <- km$cluster %>% enframe()
write.table(trans_cluster_kmeans, "Files/trans_cluster_kmeans_k4.txt", quote = F, row.names = F, sep = "\t")
summKmeans <- summ(trans_cluster_kmeans, tpm2)
plotClus(summKmeans, "K-means Clustering of Transcripts, k = 4")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summKmeans2 <- dplyr::distinct(summKmeans[,c("value", "transcripts")])
summKmeans2 <- summKmeans2[order(summKmeans2$transcripts),]
t <- summKmeans2
for (i in 1:4) {
for (j in 1:4) {
q <- length(intersect(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])) #number of observed successes
print(paste("Percent of hierCluster", i, "in Kmeans cluster", j, "is:", q/length(unique(c(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])))*100))
}
}
#> [1] "Percent of hierCluster 1 in Kmeans cluster 1 is: 1.45542753183748"
#> [1] "Percent of hierCluster 1 in Kmeans cluster 2 is: 55.7987822557263"
#> [1] "Percent of hierCluster 1 in Kmeans cluster 3 is: 4.66771323914181"
#> [1] "Percent of hierCluster 1 in Kmeans cluster 4 is: 0.577980796121935"
#> [1] "Percent of hierCluster 2 in Kmeans cluster 1 is: 29.1631125011847"
#> [1] "Percent of hierCluster 2 in Kmeans cluster 2 is: 2.04996796925048"
#> [1] "Percent of hierCluster 2 in Kmeans cluster 3 is: 43.4406332453826"
#> [1] "Percent of hierCluster 2 in Kmeans cluster 4 is: 4.43024184762873"
#> [1] "Percent of hierCluster 3 in Kmeans cluster 1 is: 21.43536121673"
#> [1] "Percent of hierCluster 3 in Kmeans cluster 2 is: 1.08703800884624"
#> [1] "Percent of hierCluster 3 in Kmeans cluster 3 is: 1.38490926456543"
#> [1] "Percent of hierCluster 3 in Kmeans cluster 4 is: 52.6292033401038"
#> [1] "Percent of hierCluster 4 in Kmeans cluster 1 is: 0.66182054898557"
#> [1] "Percent of hierCluster 4 in Kmeans cluster 2 is: 24.4796210893681"
#> [1] "Percent of hierCluster 4 in Kmeans cluster 3 is: 9.01262238999646"
#> [1] "Percent of hierCluster 4 in Kmeans cluster 4 is: 10.4880467114685"
For self-organizing maps:
library("kohonen")
set.seed(123)
som <- som(as.matrix(tpm2), grid = somgrid(4, 1, "rectangular"))
trans_cluster_som <- som$unit.classif %>% enframe()
trans_cluster_som$name <- row.names(tpm2)
Since this run could take time, I saved the results to reproduce the plot here.
trans_cluster_som <- read.table("Files/trans_cluster_som_k4.txt", header = T)
summSOM <- summ(trans_cluster_som, tpm2)
plotClus(summSOM, "Self-organizing Maps of Transcripts, k = 4")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summSOM2 <- dplyr::distinct(summSOM[,c("value", "transcripts")])
summSOM2 <- summSOM2[order(summSOM2$transcripts),]
t <- summSOM2
for (i in 1:4) {
for (j in 1:4) {
q <- length(intersect(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])) #number of observed successes
print(paste("Percent of hierCluster", i, "in SOM cluster", j, "is:", q/length(unique(c(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])))*100))
}
}
#> [1] "Percent of hierCluster 1 in SOM cluster 1 is: 55.672668716147"
#> [1] "Percent of hierCluster 1 in SOM cluster 2 is: 4.87057580559958"
#> [1] "Percent of hierCluster 1 in SOM cluster 3 is: 1.83421870301694"
#> [1] "Percent of hierCluster 1 in SOM cluster 4 is: 0.499953707990001"
#> [1] "Percent of hierCluster 2 in SOM cluster 1 is: 1.94223464487924"
#> [1] "Percent of hierCluster 2 in SOM cluster 2 is: 40.4989604989605"
#> [1] "Percent of hierCluster 2 in SOM cluster 3 is: 31.7049808429119"
#> [1] "Percent of hierCluster 2 in SOM cluster 4 is: 4.43930301496638"
#> [1] "Percent of hierCluster 3 in SOM cluster 1 is: 1.23410054512417"
#> [1] "Percent of hierCluster 3 in SOM cluster 2 is: 1.36887608069164"
#> [1] "Percent of hierCluster 3 in SOM cluster 3 is: 19.4387849601778"
#> [1] "Percent of hierCluster 3 in SOM cluster 4 is: 54.6830545123478"
#> [1] "Percent of hierCluster 4 in SOM cluster 1 is: 23.7653547254951"
#> [1] "Percent of hierCluster 4 in SOM cluster 2 is: 10.8477445882211"
#> [1] "Percent of hierCluster 4 in SOM cluster 3 is: 0.717882781527912"
#> [1] "Percent of hierCluster 4 in SOM cluster 4 is: 9.56153679183851"
For spectral clustering:
library("RclusTool")
set.seed(123)
sim <- computeGaussianSimilarity(tpm2, 1)
res <- spectralClustering(sim, K=4)
trans_cluster_sc <- data_frame(row.names(tpm2), res$label)
colnames(trans_cluster_sc) <- c("name", "value")
Plot of transcripts:
trans_cluster_sc <- read.table("Files/trans_cluster_sc_k4.txt", header = T)
summSC <- summ(trans_cluster_sc, tpm2)
plotClus(summSC, "Spectral Clustering of Transcripts")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summSC2 <- dplyr::distinct(summSC[,c("value", "transcripts")])
summSC2 <- summSC2[order(summSC2$transcripts),]
t <- summSC2
for (i in 1:4) {
for (j in 1:4) {
q <- length(intersect(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])) #number of observed successes
print(paste("Percent of hierCluster", i, "in SC cluster", j, "is:", q/length(unique(c(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])))*100))
}
}
#> [1] "Percent of hierCluster 1 in SC cluster 1 is: 2.49453308495991"
#> [1] "Percent of hierCluster 1 in SC cluster 2 is: 52.3652200030035"
#> [1] "Percent of hierCluster 1 in SC cluster 3 is: 11.0529868376645"
#> [1] "Percent of hierCluster 1 in SC cluster 4 is: 0.420029649151705"
#> [1] "Percent of hierCluster 2 in SC cluster 1 is: 63.1024096385542"
#> [1] "Percent of hierCluster 2 in SC cluster 2 is: 1.07431935246505"
#> [1] "Percent of hierCluster 2 in SC cluster 3 is: 6.80457838916308"
#> [1] "Percent of hierCluster 2 in SC cluster 4 is: 6.85126260916661"
#> [1] "Percent of hierCluster 3 in SC cluster 1 is: 7.72676371780515"
#> [1] "Percent of hierCluster 3 in SC cluster 2 is: 1.26552428863386"
#> [1] "Percent of hierCluster 3 in SC cluster 3 is: 0.561191452622491"
#> [1] "Percent of hierCluster 3 in SC cluster 4 is: 65.7231657231657"
#> [1] "Percent of hierCluster 4 in SC cluster 1 is: 3.99545375065571"
#> [1] "Percent of hierCluster 4 in SC cluster 2 is: 24.5653616456536"
#> [1] "Percent of hierCluster 4 in SC cluster 3 is: 14.2268445839874"
#> [1] "Percent of hierCluster 4 in SC cluster 4 is: 6.6013986013986"
Hierarchical clustering, k = 5:
trans_cluster <- cutree(hc, k = 5) %>% enframe()
write.table(trans_cluster, "Files/trans_cluster_hc_k5.txt", quote = F, row.names = F, sep = "\t")
table(trans_cluster$value)
#>
#> 1 2 3 4 5
#> 8091 7238 4501 2532 1209
summK3 <- summ(trans_cluster, tpm2)
summK3$value <- ifelse(summK3$value == "1", "e8.5",
ifelse(summK3$value == "2", "e9.5",
ifelse(summK3$value == "3", "e7.5", summK3$value)))
summK3$value <- ifelse(summK3$value == "e8.5", "2",
ifelse(summK3$value == "e9.5", "3",
ifelse(summK3$value == "e7.5", "1", summK3$value)))
plotClus(summK3, "Hierarchical Clustering of Transcripts, k = 5")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summK32 <- dplyr::distinct(summK3[,c("value", "transcripts")])
summK32 <- summK32[order(summK32$transcripts),]
For K-means clustering:
set.seed(123)
km <- kmeans(tpm2, centers = 5)
trans_cluster_kmeans <- km$cluster %>% enframe()
write.table(trans_cluster_kmeans, "Files/trans_cluster_kmeans_k5.txt", quote = F, row.names = F, sep = "\t")
summKmeans <- summ(trans_cluster_kmeans, tpm2)
plotClus(summKmeans, "K-means Clustering of Transcripts, k = 5")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summKmeans2 <- dplyr::distinct(summKmeans[,c("value", "transcripts")])
summKmeans2 <- summKmeans2[order(summKmeans2$transcripts),]
t <- summKmeans2
for (i in 1:5) {
for (j in 1:5) {
q <- length(intersect(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])) #number of observed successes
print(paste("Percent of hierCluster", i, "in Kmeans cluster", j, "is:", q/length(unique(c(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])))*100))
}
}
#> [1] "Percent of hierCluster 1 in Kmeans cluster 1 is: 1.33031794598909"
#> [1] "Percent of hierCluster 1 in Kmeans cluster 2 is: 54.8071034905083"
#> [1] "Percent of hierCluster 1 in Kmeans cluster 3 is: 1.23201159540325"
#> [1] "Percent of hierCluster 1 in Kmeans cluster 4 is: 0.522980991652419"
#> [1] "Percent of hierCluster 1 in Kmeans cluster 5 is: 8.20292781423523"
#> [1] "Percent of hierCluster 2 in Kmeans cluster 1 is: 8.08178223550969"
#> [1] "Percent of hierCluster 2 in Kmeans cluster 2 is: 1.37614678899083"
#> [1] "Percent of hierCluster 2 in Kmeans cluster 3 is: 43.4180881879627"
#> [1] "Percent of hierCluster 2 in Kmeans cluster 4 is: 4.54020777222008"
#> [1] "Percent of hierCluster 2 in Kmeans cluster 5 is: 24.9640435586604"
#> [1] "Percent of hierCluster 3 in Kmeans cluster 1 is: 25.3207455821835"
#> [1] "Percent of hierCluster 3 in Kmeans cluster 2 is: 0.966525223950967"
#> [1] "Percent of hierCluster 3 in Kmeans cluster 3 is: 8.67488711358111"
#> [1] "Percent of hierCluster 3 in Kmeans cluster 4 is: 45.2761296211775"
#> [1] "Percent of hierCluster 3 in Kmeans cluster 5 is: 0.49755664149267"
#> [1] "Percent of hierCluster 4 in Kmeans cluster 1 is: 1.49146451033243"
#> [1] "Percent of hierCluster 4 in Kmeans cluster 2 is: 20.8518848322944"
#> [1] "Percent of hierCluster 4 in Kmeans cluster 3 is: 0.372750642673522"
#> [1] "Percent of hierCluster 4 in Kmeans cluster 4 is: 12.2046693694953"
#> [1] "Percent of hierCluster 4 in Kmeans cluster 5 is: 2.19712207952963"
#> [1] "Percent of hierCluster 5 in Kmeans cluster 1 is: 0.0694123091161499"
#> [1] "Percent of hierCluster 5 in Kmeans cluster 2 is: 4.87467322774104"
#> [1] "Percent of hierCluster 5 in Kmeans cluster 3 is: 1.29626737466812"
#> [1] "Percent of hierCluster 5 in Kmeans cluster 4 is: 0.164375373580395"
#> [1] "Percent of hierCluster 5 in Kmeans cluster 5 is: 17.7178515712057"
For self-organizing maps:
library("kohonen")
set.seed(123)
som <- som(as.matrix(tpm2), grid = somgrid(5, 1, "rectangular"))
trans_cluster_som <- som$unit.classif %>% enframe()
trans_cluster_som$name <- row.names(tpm2)
Since this run could take time, I saved the results to reproduce the plot here.
trans_cluster_som <- read.table("Files/trans_cluster_som_k5.txt", header = T)
summSOM <- summ(trans_cluster_som, tpm2)
plotClus(summSOM, "Self-organizing Maps of Transcripts, k = 5")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summSOM2 <- dplyr::distinct(summSOM[,c("value", "transcripts")])
summSOM2 <- summSOM2[order(summSOM2$transcripts),]
t <- summSOM2
for (i in 1:5) {
for (j in 1:5) {
q <- length(intersect(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])) #number of observed successes
print(paste("Percent of hierCluster", i, "in SOM cluster", j, "is:", q/length(unique(c(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])))*100))
}
}
#> [1] "Percent of hierCluster 1 in SOM cluster 1 is: 59.1938250428816"
#> [1] "Percent of hierCluster 1 in SOM cluster 2 is: 3.89652914210871"
#> [1] "Percent of hierCluster 1 in SOM cluster 3 is: 1.97416780426336"
#> [1] "Percent of hierCluster 1 in SOM cluster 4 is: 0.0793178663493952"
#> [1] "Percent of hierCluster 1 in SOM cluster 5 is: 7.12646974476627"
#> [1] "Percent of hierCluster 2 in SOM cluster 1 is: 1.92429521697814"
#> [1] "Percent of hierCluster 2 in SOM cluster 2 is: 40.4585878067074"
#> [1] "Percent of hierCluster 2 in SOM cluster 3 is: 31.3938555764102"
#> [1] "Percent of hierCluster 2 in SOM cluster 4 is: 4.70579233300176"
#> [1] "Percent of hierCluster 2 in SOM cluster 5 is: 2.58764607679466"
#> [1] "Percent of hierCluster 3 in SOM cluster 1 is: 0.317195325542571"
#> [1] "Percent of hierCluster 3 in SOM cluster 2 is: 1.37303556658395"
#> [1] "Percent of hierCluster 3 in SOM cluster 3 is: 16.7401294194879"
#> [1] "Percent of hierCluster 3 in SOM cluster 4 is: 57.7258758451137"
#> [1] "Percent of hierCluster 3 in SOM cluster 5 is: 5.72760227861212"
#> [1] "Percent of hierCluster 4 in SOM cluster 1 is: 8.51884832294449"
#> [1] "Percent of hierCluster 4 in SOM cluster 2 is: 1.05742203185651"
#> [1] "Percent of hierCluster 4 in SOM cluster 3 is: 0.597713097713098"
#> [1] "Percent of hierCluster 4 in SOM cluster 4 is: 3.47682119205298"
#> [1] "Percent of hierCluster 4 in SOM cluster 5 is: 39.5738203957382"
#> [1] "Percent of hierCluster 5 in SOM cluster 1 is: 8.5947416137806"
#> [1] "Percent of hierCluster 5 in SOM cluster 2 is: 11.4551637730446"
#> [1] "Percent of hierCluster 5 in SOM cluster 3 is: 0.202934748673119"
#> [1] "Percent of hierCluster 5 in SOM cluster 4 is: 0.0147037200411704"
#> [1] "Percent of hierCluster 5 in SOM cluster 5 is: 1.97657393850659"
For spectral clustering:
library("RclusTool")
set.seed(123)
sim <- computeGaussianSimilarity(tpm2, 1)
res <- spectralClustering(sim, K=5)
trans_cluster_sc <- data_frame(row.names(tpm2), res$label)
colnames(trans_cluster_sc) <- c("name", "value")
Plot of transcripts:
trans_cluster_sc <- read.table("Files/trans_cluster_sc_k5.txt", header = T)
summSC <- summ(trans_cluster_sc, tpm2)
plotClus(summSC, "Spectral Clustering of Transcripts, k = 5")
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
summSC2 <- dplyr::distinct(summSC[,c("value", "transcripts")])
summSC2 <- summSC2[order(summSC2$transcripts),]
t <- summSC2
for (i in 1:5) {
for (j in 1:5) {
q <- length(intersect(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])) #number of observed successes
print(paste("Percent of hierCluster", i, "in SC cluster", j, "is:", q/length(unique(c(summK32[summK32$value == i, "transcripts"], t[t$value == j, "transcripts"])))*100))
}
}
#> [1] "Percent of hierCluster 1 in SC cluster 1 is: 0.288350634371396"
#> [1] "Percent of hierCluster 1 in SC cluster 2 is: 0.314369739377345"
#> [1] "Percent of hierCluster 1 in SC cluster 3 is: 0.638162093171666"
#> [1] "Percent of hierCluster 1 in SC cluster 4 is: 5.24638180565127"
#> [1] "Percent of hierCluster 1 in SC cluster 5 is: 55.0709939148073"
#> [1] "Percent of hierCluster 2 in SC cluster 1 is: 12.5867406853849"
#> [1] "Percent of hierCluster 2 in SC cluster 2 is: 2.64961169483783"
#> [1] "Percent of hierCluster 2 in SC cluster 3 is: 5.54489230113031"
#> [1] "Percent of hierCluster 2 in SC cluster 4 is: 57.9278649215706"
#> [1] "Percent of hierCluster 2 in SC cluster 5 is: 1.75126361500676"
#> [1] "Percent of hierCluster 3 in SC cluster 1 is: 13.6875439831105"
#> [1] "Percent of hierCluster 3 in SC cluster 2 is: 50.866085294469"
#> [1] "Percent of hierCluster 3 in SC cluster 3 is: 15.3551842877184"
#> [1] "Percent of hierCluster 3 in SC cluster 4 is: 3.18796577422026"
#> [1] "Percent of hierCluster 3 in SC cluster 5 is: 1.10584518167457"
#> [1] "Percent of hierCluster 4 in SC cluster 1 is: 1.54754632457748"
#> [1] "Percent of hierCluster 4 in SC cluster 2 is: 10.5020920502092"
#> [1] "Percent of hierCluster 4 in SC cluster 3 is: 0.953932061423918"
#> [1] "Percent of hierCluster 4 in SC cluster 4 is: 1.294850252051"
#> [1] "Percent of hierCluster 4 in SC cluster 5 is: 21.255032625295"
#> [1] "Percent of hierCluster 5 in SC cluster 1 is: 0.356066831005204"
#> [1] "Percent of hierCluster 5 in SC cluster 2 is: 0.0151538111835127"
#> [1] "Percent of hierCluster 5 in SC cluster 3 is: 0.06635700066357"
#> [1] "Percent of hierCluster 5 in SC cluster 4 is: 8.72213424290413"
#> [1] "Percent of hierCluster 5 in SC cluster 5 is: 6.87914623593885"
Markers were collected from 5 review papers (see References).
We first load necessary files:
t2g <- read.table("Files/t2g.txt", header = T, sep = "\t")
e7.5specific <- read.table("Files/e7.5specific_ensGenes.txt", header = F)
e7.5hier <- read.table("Files/e7.5Group.txt", header = T)
e8.5specific <- read.table("Files/e8.5specific_ensGenes.txt", header = F)
e8.5hier <- read.table("Files/e8.5Group.txt", header = T)
e9.5specific <- read.table("Files/e9.5specific_ensGenes.txt", header = F)
e9.5hier <- read.table("Files/e9.5Group.txt", header = T)
Trophoblast giant cell differentiation markers:
tgc <- read.table("Files/0_e7.5_TGCdiffMarkers.txt", header = F)
tgc <- dplyr::inner_join(tgc, t2g[,2:3], by = c("V1" = "ext_gene"))
tgc <- dplyr::distinct(tgc)
tgc[tgc$ens_gene %in% e7.5specific$V1,]
#> V1 ens_gene
#> 1 Hand1 ENSMUSG00000037335
#> 3 Ascl2 ENSMUSG00000009248
#> 8 Gata3 ENSMUSG00000015619
#> 17 Pthlh ENSMUSG00000048776
#> 25 Gmnn ENSMUSG00000006715
tgc[tgc$ens_gene %in% e7.5hier$ens_gene,]
#> V1 ens_gene
#> 1 Hand1 ENSMUSG00000037335
#> 2 Mdfi ENSMUSG00000032717
#> 3 Ascl2 ENSMUSG00000009248
#> 5 Eed ENSMUSG00000030619
#> 6 Snai1 ENSMUSG00000042821
#> 7 Gata2 ENSMUSG00000015053
#> 8 Gata3 ENSMUSG00000015619
#> 9 Sox15 ENSMUSG00000041287
#> 10 Lif ENSMUSG00000034394
#> 11 Lifr ENSMUSG00000054263
#> 12 Socs3 ENSMUSG00000053113
#> 13 Jak1 ENSMUSG00000028530
#> 16 Rxrb ENSMUSG00000039656
#> 17 Pthlh ENSMUSG00000048776
#> 18 Fbxw7 ENSMUSG00000028086
#> 21 Ccne2 ENSMUSG00000028212
#> 22 Cdkn1c ENSMUSG00000037664
#> 23 Trp53 ENSMUSG00000059552
#> 25 Gmnn ENSMUSG00000006715
tgc[tgc$ens_gene %in% e8.5specific$V1,]
#> V1 ens_gene
#> 5 Eed ENSMUSG00000030619
tgc[tgc$ens_gene %in% e8.5hier$ens_gene,]
#> V1 ens_gene
#> 2 Mdfi ENSMUSG00000032717
#> 5 Eed ENSMUSG00000030619
#> 11 Lifr ENSMUSG00000054263
#> 15 Rxra ENSMUSG00000015846
#> 18 Fbxw7 ENSMUSG00000028086
#> 19 Chm ENSMUSG00000025531
#> 20 Ccne1 ENSMUSG00000002068
#> 21 Ccne2 ENSMUSG00000028212
#> 22 Cdkn1c ENSMUSG00000037664
#> 23 Trp53 ENSMUSG00000059552
#> 25 Gmnn ENSMUSG00000006715
dim(tgc[tgc$ens_gene %in% e8.5hier$ens_gene,])
#> [1] 11 2
tgc[tgc$ens_gene %in% e9.5specific$V1,]
#> V1 ens_gene
#> 4 Bhlhe40 ENSMUSG00000030103
#> 11 Lifr ENSMUSG00000054263
#> 15 Rxra ENSMUSG00000015846
#> 24 Mfn2 ENSMUSG00000029020
tgc[tgc$ens_gene %in% e9.5hier$ens_gene,]
#> V1 ens_gene
#> 4 Bhlhe40 ENSMUSG00000030103
#> 11 Lifr ENSMUSG00000054263
#> 14 Stat3 ENSMUSG00000004040
#> 15 Rxra ENSMUSG00000015846
#> 21 Ccne2 ENSMUSG00000028212
#> 22 Cdkn1c ENSMUSG00000037664
#> 23 Trp53 ENSMUSG00000059552
#> 24 Mfn2 ENSMUSG00000029020
#> 26 Procr ENSMUSG00000027611
dim(tgc[tgc$ens_gene %in% e9.5hier$ens_gene,])
#> [1] 9 2
Ectoplacental cone and spongiotrophoblast maintainance markers:
epcSpt <- read.table("Files/0_e7.5_EPC-SGT-markers.txt", header = F)
epcSpt <- dplyr::inner_join(epcSpt, t2g[,2:3], by = c("V1" = "ext_gene"))
epcSpt <- dplyr::distinct(epcSpt)
epcSpt[epcSpt$ens_gene %in% e7.5specific$V1,]
#> V1 ens_gene
#> 1 Ascl2 ENSMUSG00000009248
#> 5 Ppard ENSMUSG00000002250
#> 16 Gjb5 ENSMUSG00000042357
#> 23 Ets2 ENSMUSG00000022895
epcSpt[epcSpt$ens_gene %in% e7.5hier$ens_gene,]
#> V1 ens_gene
#> 1 Ascl2 ENSMUSG00000009248
#> 2 Sp1 ENSMUSG00000001280
#> 3 Sp3 ENSMUSG00000027109
#> 5 Ppard ENSMUSG00000002250
#> 8 Hif1a ENSMUSG00000021109
#> 9 Cited1 ENSMUSG00000051159
#> 10 Cited2 ENSMUSG00000039910
#> 14 Krt19 ENSMUSG00000020911
#> 15 Gjb3 ENSMUSG00000042367
#> 16 Gjb5 ENSMUSG00000042357
#> 17 Birc6 ENSMUSG00000024073
#> 18 Hopx ENSMUSG00000059325
#> 22 Hsf1 ENSMUSG00000022556
#> 23 Ets2 ENSMUSG00000022895
dim(epcSpt[epcSpt$ens_gene %in% e7.5hier$ens_gene,])
#> [1] 14 2
epcSpt[epcSpt$ens_gene %in% e8.5specific$V1,]
#> V1 ens_gene
#> 2 Sp1 ENSMUSG00000001280
#> 7 Arnt ENSMUSG00000015522
#> 11 Dnmt3l ENSMUSG00000000730
epcSpt[epcSpt$ens_gene %in% e8.5hier$ens_gene,]
#> V1 ens_gene
#> 2 Sp1 ENSMUSG00000001280
#> 4 Pparg ENSMUSG00000000440
#> 5 Ppard ENSMUSG00000002250
#> 7 Arnt ENSMUSG00000015522
#> 8 Hif1a ENSMUSG00000021109
#> 9 Cited1 ENSMUSG00000051159
#> 11 Dnmt3l ENSMUSG00000000730
#> 17 Birc6 ENSMUSG00000024073
#> 22 Hsf1 ENSMUSG00000022556
dim(epcSpt[epcSpt$ens_gene %in% e8.5hier$ens_gene,])
#> [1] 9 2
epcSpt[epcSpt$ens_gene %in% e9.5specific$V1,]
#> V1 ens_gene
#> 4 Pparg ENSMUSG00000000440
#> 6 Epas1 ENSMUSG00000024140
#> 13 Krt18 ENSMUSG00000023043
#> 21 Egfr ENSMUSG00000020122
epcSpt[epcSpt$ens_gene %in% e9.5hier$ens_gene,]
#> V1 ens_gene
#> 4 Pparg ENSMUSG00000000440
#> 6 Epas1 ENSMUSG00000024140
#> 10 Cited2 ENSMUSG00000039910
#> 12 Krt8 ENSMUSG00000049382
#> 13 Krt18 ENSMUSG00000023043
#> 17 Birc6 ENSMUSG00000024073
#> 19 Tln1 ENSMUSG00000028465
#> 21 Egfr ENSMUSG00000020122
#> 22 Hsf1 ENSMUSG00000022556
dim(epcSpt[epcSpt$ens_gene %in% e9.5hier$ens_gene,])
#> [1] 9 2
Chorioallantoic attachment markers:
chorioAll <- read.table("Files/0_e8.5_chorioallantoicAttachment.txt", header = F)
chorioAll <- dplyr::left_join(chorioAll, t2g[,2:3], by = c("V1" = "ext_gene"))
chorioAll <- dplyr::distinct(chorioAll)
chorioAll[chorioAll$ens_gene %in% e7.5specific$V1,]
#> V1 ens_gene
#> 21 Ctbp2 ENSMUSG00000030970
#> 23 Grb2 ENSMUSG00000059923
chorioAll[chorioAll$ens_gene %in% e7.5hier$ens_gene,]
#> V1 ens_gene
#> 6 Dnajb6 ENSMUSG00000029131
#> 11 Cdx2 ENSMUSG00000029646
#> 12 Plpp3 ENSMUSG00000028517
#> 13 Ccnf ENSMUSG00000072082
#> 16 Esrrb ENSMUSG00000021255
#> 17 Fgfr2 ENSMUSG00000030849
#> 21 Ctbp2 ENSMUSG00000030970
#> 22 Ctbp1 ENSMUSG00000037373
#> 23 Grb2 ENSMUSG00000059923
#> 25 Wnt7b ENSMUSG00000022382
dim(chorioAll[chorioAll$ens_gene %in% e7.5hier$ens_gene,])
#> [1] 10 2
chorioAll[chorioAll$ens_gene %in% e8.5specific$V1,]
#> V1 ens_gene
#> 2 Bmp5 ENSMUSG00000032179
#> 3 Dnmt1 ENSMUSG00000004099
#> 4 Itga4 ENSMUSG00000027009
#> 15 Ubr5 ENSMUSG00000037487
#> 20 Tbx4 ENSMUSG00000000094
#> 24 Rbpj ENSMUSG00000039191
#> 25 Wnt7b ENSMUSG00000022382
chorioAll[chorioAll$ens_gene %in% e8.5hier$ens_gene,]
#> V1 ens_gene
#> 1 Bmp7 ENSMUSG00000008999
#> 2 Bmp5 ENSMUSG00000032179
#> 3 Dnmt1 ENSMUSG00000004099
#> 4 Itga4 ENSMUSG00000027009
#> 6 Dnajb6 ENSMUSG00000029131
#> 7 Lef1 ENSMUSG00000027985
#> 13 Ccnf ENSMUSG00000072082
#> 15 Ubr5 ENSMUSG00000037487
#> 17 Fgfr2 ENSMUSG00000030849
#> 20 Tbx4 ENSMUSG00000000094
#> 23 Grb2 ENSMUSG00000059923
#> 24 Rbpj ENSMUSG00000039191
#> 25 Wnt7b ENSMUSG00000022382
dim(chorioAll[chorioAll$ens_gene %in% e8.5hier$ens_gene,])
#> [1] 13 2
chorioAll[chorioAll$ens_gene %in% e9.5specific$V1,]
#> V1 ens_gene
#> 10 Vcam1 ENSMUSG00000027962
#> 19 Smad1 ENSMUSG00000031681
#> 20 Tbx4 ENSMUSG00000000094
chorioAll[chorioAll$ens_gene %in% e9.5hier$ens_gene,]
#> V1 ens_gene
#> 6 Dnajb6 ENSMUSG00000029131
#> 10 Vcam1 ENSMUSG00000027962
#> 14 Ccn1 ENSMUSG00000028195
#> 17 Fgfr2 ENSMUSG00000030849
#> 19 Smad1 ENSMUSG00000031681
#> 20 Tbx4 ENSMUSG00000000094
#> 22 Ctbp1 ENSMUSG00000037373
#> 26 Zfp36l1 ENSMUSG00000021127
dim(chorioAll[chorioAll$ens_gene %in% e9.5hier$ens_gene,])
#> [1] 8 2
Labyrinth branching and vascularization - Syncytiotrophoblast markers:
laby <- read.table("Files/0_e9.5_branching-labyrinthMarkers.txt", header = F)
laby <- dplyr::left_join(laby, t2g[,2:3], by = c("V1" = "ext_gene"))
laby <- dplyr::distinct(laby)
laby[laby$ens_gene %in% e7.5specific$V1,]
#> V1 ens_gene
#> 6 Grb2 ENSMUSG00000059923
#> 10 Junb ENSMUSG00000052837
#> 21 Sos1 ENSMUSG00000024241
#> 38 Ctbp2 ENSMUSG00000030970
#> 64 Ube2l3 ENSMUSG00000038965
#> 67 Dll4 ENSMUSG00000027314
#> 74 Tfap2c ENSMUSG00000028640
head(laby[laby$ens_gene %in% e7.5hier$ens_gene,])
#> V1 ens_gene
#> 2 Fgfr2 ENSMUSG00000030849
#> 4 Gab1 ENSMUSG00000031714
#> 6 Grb2 ENSMUSG00000059923
#> 8 Hsp90ab1 ENSMUSG00000023944
#> 9 Itgav ENSMUSG00000027087
#> 10 Junb ENSMUSG00000052837
dim(laby[laby$ens_gene %in% e7.5hier$ens_gene,])
#> [1] 32 2
laby[laby$ens_gene %in% e8.5specific$V1,]
#> V1 ens_gene
#> 27 Itga4 ENSMUSG00000027009
#> 28 Arnt ENSMUSG00000015522
#> 31 Bmp5 ENSMUSG00000032179
#> 43 Ubr5 ENSMUSG00000037487
#> 44 Mapk1 ENSMUSG00000063358
#> 47 Igf2 ENSMUSG00000048583
#> 51 Ubp1 ENSMUSG00000009741
#> 58 Akt1 ENSMUSG00000001729
#> 63 Rock2 ENSMUSG00000020580
#> 70 Hey2 ENSMUSG00000019789
#> 73 Rbpj ENSMUSG00000039191
head(laby[laby$ens_gene %in% e8.5hier$ens_gene,])
#> V1 ens_gene
#> 2 Fgfr2 ENSMUSG00000030849
#> 4 Gab1 ENSMUSG00000031714
#> 6 Grb2 ENSMUSG00000059923
#> 8 Hsp90ab1 ENSMUSG00000023944
#> 9 Itgav ENSMUSG00000027087
#> 11 Lifr ENSMUSG00000054263
dim(laby[laby$ens_gene %in% e8.5hier$ens_gene,])
#> [1] 37 2
laby[laby$ens_gene %in% e9.5specific$V1,]
#> V1 ens_gene
#> 1 Dlx3 ENSMUSG00000001510
#> 3 Fzd5 ENSMUSG00000045005
#> 5 Gcm1 ENSMUSG00000023333
#> 11 Lifr ENSMUSG00000054263
#> 15 Met ENSMUSG00000009376
#> 17 Pdgfb ENSMUSG00000000489
#> 18 Pparg ENSMUSG00000000440
#> 19 Rxra ENSMUSG00000015846
#> 23 Wnt2 ENSMUSG00000010797
#> 25 Adra2b ENSMUSG00000058620
#> 26 Adra2c ENSMUSG00000045318
#> 32 Bmp2 ENSMUSG00000027358
#> 34 Cebpa ENSMUSG00000034957
#> 35 Cebpb ENSMUSG00000056501
#> 39 Gjb2 ENSMUSG00000046352
#> 47 Igf2 ENSMUSG00000048583
#> 50 Lama5 ENSMUSG00000015647
#> 54 Muc1 ENSMUSG00000042784
#> 62 Rb1 ENSMUSG00000022105
#> 63 Rock2 ENSMUSG00000020580
#> 68 Esx1 ENSMUSG00000023443
#> 69 Hey1 ENSMUSG00000040289
#> 71 Notch1 ENSMUSG00000026923
#> 72 Notch4 ENSMUSG00000015468
#> 76 Egfr ENSMUSG00000020122
#> 79 Krt18 ENSMUSG00000023043
#> 83 Tfeb ENSMUSG00000023990
head(laby[laby$ens_gene %in% e9.5hier$ens_gene,])
#> V1 ens_gene
#> 1 Dlx3 ENSMUSG00000001510
#> 2 Fgfr2 ENSMUSG00000030849
#> 3 Fzd5 ENSMUSG00000045005
#> 5 Gcm1 ENSMUSG00000023333
#> 8 Hsp90ab1 ENSMUSG00000023944
#> 9 Itgav ENSMUSG00000027087
dim(laby[laby$ens_gene %in% e9.5hier$ens_gene,])
#> [1] 42 2
sessionInfo()
#> R version 4.0.2 (2020-06-22)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] forcats_0.5.1 stringr_1.4.0 purrr_0.3.4 readr_2.1.2
#> [5] tidyr_1.2.0 tibble_3.1.7 tidyverse_1.3.1 dplyr_1.0.9
#> [9] ggplot2_3.3.6
#>
#> loaded via a namespace (and not attached):
#> [1] tidyselect_1.1.2 xfun_0.31 bslib_0.3.1 haven_2.5.0
#> [5] colorspace_2.0-3 vctrs_0.4.1 generics_0.1.2 htmltools_0.5.2
#> [9] yaml_2.3.5 utf8_1.2.2 rlang_1.0.3 jquerylib_0.1.4
#> [13] pillar_1.7.0 glue_1.6.2 withr_2.5.0 DBI_1.1.3
#> [17] dbplyr_2.2.1 readxl_1.4.0 modelr_0.1.8 lifecycle_1.0.1
#> [21] cellranger_1.1.0 munsell_0.5.0 gtable_0.3.0 rvest_1.0.2
#> [25] evaluate_0.15 labeling_0.4.2 knitr_1.39 tzdb_0.3.0
#> [29] fastmap_1.1.0 fansi_1.0.3 highr_0.9 broom_0.8.0
#> [33] scales_1.2.0 backports_1.4.1 jsonlite_1.8.0 farver_2.1.0
#> [37] fs_1.5.2 hms_1.1.1 digest_0.6.29 stringi_1.7.6
#> [41] grid_4.0.2 cli_3.3.0 tools_4.0.2 magrittr_2.0.3
#> [45] sass_0.4.1 crayon_1.5.1 pkgconfig_2.0.3 ellipsis_0.3.2
#> [49] xml2_1.3.3 reprex_2.0.1 lubridate_1.8.0 assertthat_0.2.1
#> [53] rmarkdown_2.14 httr_1.4.3 rstudioapi_0.13 R6_2.5.1
#> [57] compiler_4.0.2